###############################################################################
###############################################################################
############## FIRST ANALYSIS SCRIPT ##########################################
############## DESCRITPIVE PLOTS ##############################################
############## Correlations ###############################################
###############################################################################

library(xtable)
library(ggplot2)
library(gridExtra)
library(zoo)
library(plotrix)
load("Data/EP_debates_07062023.Rdata")


#### LEFT RIGHT U-CURVE FIGURE 1 IN PAPER ####

group_averages <- aggregate(EP_debates$embed_dict,
                            by = list(EP_debates$CHES_Party),
                            FUN = mean) # or mean
colnames(group_averages) <- c("CHES_party", "embed_dict")



group_averages2 <- aggregate(EP_debates$lrgen,
                             by = list(EP_debates$CHES_Party),
                             FUN = mean) # or mean
colnames(group_averages2) <- c("CHES_party", "lrgen")

EP_debates$count <- 1

group_averages3 <- aggregate(EP_debates$count,
                             by = list(EP_debates$CHES_Party),
                             FUN = sum)

group_averages4 <- aggregate(EP_debates$challenge,
                             by = list(EP_debates$CHES_Party),
                             FUN = mean)
group_averages4$challenge <- ifelse(group_averages4$x == 1, "Challenger", "Mainstream")

group_averages4 <- cbind(group_averages, group_averages2$lrgen, group_averages3$x, group_averages4$challenge)
colnames(group_averages4)[3:5] <- c("lrgen", "Speeches", "Party")
group_averages5 <- subset(group_averages4, group_averages4$Speeches >100)


keep_big <- c("PSD", "LAB", "CONS", "PS", "FI", "CDU", "PP" , "PDL", "SPD", "PSOE",
              "FN", "UMP", "KDH", "UKIP", "IU", "VERTS", "M5S", "AfD", "ICV", "KKE", "PVV")

group_averages5$party_bin <- group_averages5$CHES_party %in% keep_big 

group_averages5$new_names <- ifelse(group_averages5$party_bin, group_averages5$CHES_party, " ")

group_averages5$Party <- as.factor(group_averages5$Party)
p_parties <- ggplot(group_averages5, aes(scale(lrgen), scale(embed_dict)))+
  geom_point(aes(size = Speeches, shape = Party),alpha = 0.4)+
  geom_smooth(method = loess, color = "black",alpha = 0.2)+
  theme_classic()+
  labs(title = "Anti-Establishment Rhetoric",
       subtitle = "Per National Party",
       x = "Party Position Left-Right \n (Standardized and Centered around 0)",
       y = "Anti-Establishment Rhetoric Score \n (Standardized and Centered around 0)")+
  geom_text(aes(label=new_names),position=position_jitter(width=.25,height=.5))+
  theme(text = element_text(size = 16)) 

p_parties

ggsave("Figures/ucurve_190224.png", p_parties, width = 230, height = 160, device='png', units = "mm", dpi=350, scale = 1)


#### EU-position Curve (Apendix) #######


group_averages <- aggregate(EP_debates$embed_dict,
                            by = list(EP_debates$CHES_Party),
                            FUN = mean) # or mean
colnames(group_averages) <- c("CHES_party", "embed_dict")



group_averages2 <- aggregate(EP_debates$eu_position,
                             by = list(EP_debates$CHES_Party),
                             FUN = mean) # or mean
colnames(group_averages2) <- c("CHES_party", "lrgen")

EP_debates$count <- 1

group_averages3 <- aggregate(EP_debates$count,
                             by = list(EP_debates$CHES_Party),
                             FUN = sum)
group_averages4 <- aggregate(EP_debates$challenge,
                             by = list(EP_debates$CHES_Party),
                             FUN = mean)
group_averages4$challenge <- ifelse(group_averages4$x == 1, "Challenger", "Mainstream")

group_averages4 <- cbind(group_averages, group_averages2$lrgen, group_averages3$x, group_averages4$challenge)
colnames(group_averages4)[3:5] <- c("eu_position", "Speeches", "Party")
group_averages5 <- subset(group_averages4, group_averages4$Speeches >100)

group_averages5$party_bin <- group_averages5$CHES_party %in% keep_big

group_averages5$new_names <- ifelse(group_averages5$party_bin, group_averages5$CHES_party, " ")

group_averages5$Party <- as.factor(group_averages5$Party)
p_parties_eu <- ggplot(group_averages5, aes(scale(eu_position), scale(embed_dict)))+
  geom_point(aes(size = Speeches, shape = Party),alpha = 0.4)+
  geom_smooth(method = loess, color = "black", alpha = 0.2)+
  theme_classic()+
  labs(title ="Anti-Establishment Rhetoric",
       subtitle = "Per National Party",
       x = "Party Position Anti - Pro-EU \n (Standardized and Centered around 0)",
       y = "Anti-Establishment Score \n (Standardized and Centered around 0)")+
  geom_text(aes(label=new_names),position=position_jitter(width=.1,height=.9))+
  theme(text = element_text(size = 16)) 


p_parties_eu


#### GAL TAN U CURVE ###############

group_averages <- aggregate(EP_debates$embed_dict,
                            by = list(EP_debates$CHES_Party),
                            FUN = mean) # or mean
colnames(group_averages) <- c("CHES_party", "embed_dict")



group_averages2 <- aggregate(EP_debates$galtan,
                             by = list(EP_debates$CHES_Party),
                             FUN = mean) # or mean
colnames(group_averages2) <- c("CHES_party", "lrgen")

EP_debates$count <- 1

group_averages3 <- aggregate(EP_debates$count,
                             by = list(EP_debates$CHES_Party),
                             FUN = sum)
group_averages4 <- aggregate(EP_debates$challenge,
                             by = list(EP_debates$CHES_Party),
                             FUN = mean)
group_averages4$challenge <- ifelse(group_averages4$x == 1, "Challenger", "Mainstream")

group_averages4 <- cbind(group_averages, group_averages2$lrgen, group_averages3$x, group_averages4$challenge)
colnames(group_averages4)[3:5] <- c("galtan_position", "Speeches", "Party")
group_averages5 <- subset(group_averages4, group_averages4$Speeches >100)

group_averages5$party_bin <- group_averages5$CHES_party %in% keep_big

group_averages5$new_names <- ifelse(group_averages5$party_bin, group_averages5$CHES_party, " ")

group_averages5$Party <- as.factor(group_averages5$Party)
p_parties_galtan <- ggplot(group_averages5, aes(scale(galtan_position), scale(embed_dict)))+
  geom_point(aes(size = Speeches, shape = Party),alpha = 0.4)+
  geom_smooth(method = loess, color = "black", alpha = 0.2)+
  theme_classic()+
  labs(title ="Anti-Establishment Rhetoric",
       subtitle = "Per National Party",
       x = "Party Position GAL - TAN \n (Standardized and Centered around 0)",
       y = "Anti-Establishment Score \n (Standardized and Centered around 0)")+
  geom_text(aes(label=new_names),position=position_jitter(width=.1,height=.9))+
  theme(text = element_text(size = 16)) 

p_parties_galtan


ucurves <- grid.arrange(p_parties_eu, p_parties_galtan,
                        ncol = 2)

ggsave("Figures/ucurves_appendix_190224.png", ucurves, width = 230, height = 130, device='png', units = "mm", dpi=350, scale = 1.5)

### CREATE OVER TIME PLOTS

EP_debates$year_qrt <- as.yearqtr(EP_debates$date)

time_embed <-  EP_debates %>% group_by(year_qrt, challenge) %>% summarise(mean = mean(embed_dict, na.rm = TRUE))

time_AE_plot <- ggplot(time_embed, aes(x = year_qrt, y = scale(mean), color = as.factor(challenge)))+
  geom_line(aes(), size=.1, alpha=0.2)+
  scale_color_manual(name = "Party",
                     breaks = c("0", "1"),
                     values= c("black", "darkgray"),
                     labels = c("Mainstream", "Challenger"))+
  geom_smooth(method=loess, size = .9, alpha = .6)+
  labs(title = "Anti-Establishment Rhetoric",
       subtitle = "Challenger and Mainstream Parties over Time",
       x = "Time",
       y = "Anti-Establishment Score  \n (Standardized and Centered around 0)")+
  theme_classic()+
  theme(text = element_text(size = 16)) 

time_AE_plot

ggsave("Figures/time_AE_200224.png", time_AE_plot, width = 130, height = 130, device='png', units = "mm", dpi=300, scale = 1.3)



time_immi <- EP_debates %>% group_by(year_qrt, challenge) %>% summarise(mean = mean(immi_dict, na.rm = TRUE))

time_immi_plot <- ggplot(time_immi, aes(x = year_qrt, y = scale(mean), color = as.factor(challenge)))+
  geom_line(aes(), size=.1, alpha=0.2)+
  scale_color_manual(name = "Party",
                     breaks = c("0", "1"),
                     values= c("black", "darkgray"),
                     labels = c("Mainstream", "Challenger"))+
  geom_smooth(method=loess, size = .9, alpha = .6)+
  labs(title = "Immigration Emphasis",
       subtitle = "Challenger and Mainstream Parties over Time",
       x = "Time",
       y = "Immigration Emphasis \n (Standardized and Centered around 0)")+
  theme_classic()+
  theme(legend.position="none")+
  theme(text = element_text(size = 16)) 

time_immi_plot

time_aut <- EP_debates %>% group_by(year_qrt, challenge) %>% summarise(mean = mean(aut_dict, na.rm = TRUE))

time_aut_plot <- ggplot(time_aut, aes(x = year_qrt, y = scale(mean), color = as.factor(challenge)))+
  geom_line(aes(), size=.1, alpha=0.2)+
  scale_color_manual(name = "Party",
                     breaks = c("0", "1"),
                     values= c("black", "darkgray"),
                     labels = c("Mainstream", "Challenger"))+
  geom_smooth(method=loess, size = .9, alpha = .6)+
  labs(title = "Austerity Emphasis",
       subtitle = "Challenger and Mainstream Parties over Time",
       x = "Time",
       y = "Austerity Emphasis Score \n (Standardized and Centered around 0)")+
  theme_classic() +
  theme(legend.position="none")+
  theme(text = element_text(size = 16)) 


time_aut_plot


time_int <- EP_debates %>% group_by(year_qrt, challenge) %>% summarise(mean = mean(int_dict, na.rm = TRUE))

time_int_plot <- ggplot(time_int, aes(x = year_qrt, y =scale(mean), color = as.factor(challenge)))+
  geom_line(aes(), size=.1, alpha=0.2)+
  scale_color_manual(name = "Party",
                     breaks = c("0", "1"),
                     values= c("black", "darkgray"),
                     labels = c("Dominant", "Challenger"))+
  geom_smooth(method=loess, size = .9, alpha = .6)+
  labs(title = "EU-Integration Emphasis",
       subtitle = "Challenger and Mainstream Parties over Time",
       x = "Time",
       y = "EU-Integration Emphasis Score \n (Standardized and Centered around 0)")+
  theme_classic()+
  theme(text = element_text(size = 16)) 

time_int_plot


time_plots <- grid.arrange(time_aut_plot, time_immi_plot, time_int_plot,
                           ncol = 3)

ggsave("Figures/time_plots_issues_200224.png", time_plots, width = 230, height = 130, device='png', units = "mm", dpi=300, scale = 1.8)



#### CREATE PLOTS FOR EP GROUPS ####

levels(EP_debates$EU_party)
levels(EP_debates$EU_party) <- c("NGL", "EFD/EFDD", "ECR",
                                "EDD", "ALDE",  "EPP",
                                "Greens","S&D", "ID",
                                "No Group", "UEN", "EFD/EFDD", 
                                "NGL", "Greens", "No Group")
levels(EP_debates$EU_party)

EP_debates$embed_dict_scale <- scale(EP_debates$embed_dict)

data_groups <- EP_debates %>% group_by(EU_party) %>% summarise(m = mean(embed_dict_scale),
                                                                SE = sd(embed_dict_scale)/sqrt(length(embed_dict_scale)))

plot_main <- ggplot(data_groups, aes(x = factor(EU_party, level=c('No Group', 'NGL', 'Greens', 'S&D', 'EPP', "ALDE", "ECR", "EFD/EFDD", "UEN", "EDD", "ID")),m,fill=as.factor(EU_party))) + 
  geom_point(stat="identity", color="black",
             position=position_dodge())+
  geom_errorbar(aes(ymin=m-1.96*SE, ymax=m+1.96*SE), width=.5,
                position=position_dodge(.9))+
  theme_classic()+
  labs(title = "Anti-Establishment Rhetoric ",
       subtitle = "Per Parliamentary Group",
       x = "Parliamentary Groups \n(loosely order from Left-Right)",
       y = "Anti-Establishment Rhetoric Score \n(Standardized and Centered around 0)")+
  theme(text = element_text(size = 16))+
  theme(legend.position = "none")

plot_main

ggsave("Figures/group_plot_080623.png", plot_main, width = 230, height = 130, device='png', units = "mm", dpi=300, scale = 1.8)


# Last checked and run on 08.06.2023 ####

############### Correlation Matrices Between Measures ####


#### WITHIN-DEBATE CORRELATIONS #####

debate_averages <- aggregate(list(EP_debates$embed_dict, EP_debates$immi_dict, EP_debates$int_dict, EP_debates$aut_dict),
                                               by = list(EP_debates$debate),
                                               FUN = mean) # or mean
colnames(debate_averages) <- c("debate", "embed_dict", "immi_dict", "int_dict", "aut_dict")
debate_cors <- cor(debate_averages[,2:5],debate_averages[2:5])
colnames(debate_cors) <- c("Anti-Establishment Rhetoric", "Immigration",  "EU-Integration", "Austerity")
rownames(debate_cors) <- c("Anti-Establishment Rhetoric", "Immigration",  "EU-Integration", "Austerity")
print(xtable(debate_cors), type="latex", file = "Tables/debate_cors.tex")

#### WITHIN MONTH CORRELATIONS
time_averages <- aggregate(list(EP_debates$embed_dict, EP_debates$immi_dict, EP_debates$int_dict, EP_debates$aut_dict),
                             by = list(EP_debates$year_mon),
                             FUN = mean) # or mean
colnames(time_averages) <- c("date", "embed_dict", "immi_dict", "int_dict", "aut_dict")
time_cors <- cor(time_averages[,2:5],time_averages[2:5])
colnames(time_cors) <- c("Anti-Establishment Rhetoric", "Immigration",  "EU-Integration", "Austerity")
rownames(time_cors) <- c("Anti-Establishment Rhetoric", "Immigration",  "EU-Integration", "Austerity")
print(xtable(time_cors), type="latex", file = "Tables/time_cors.tex")

### WITHIN_SPEECH CORRELATIONS ###
speech_cors<- cor(EP_debates[,63:66],EP_debates[63:66])
colnames(speech_cors) <- c("Anti-Establishment Rhetoric", "Immigration",  "EU-Integration", "Austerity")
rownames(speech_cors) <- c("Anti-Establishment Rhetoric", "Immigration",  "EU-Integration", "Austerity")
print(xtable(speech_cors), type="latex", file = "Tables/speech_cors.tex")


# Last checked and run on 12.03.2023 ####
# Updated Mainstream instead of Dominant on 20.02.24
